//options
clear all
set maxvar  32000, permanently
set matsize 11000, permanently
set more off, permanently
global bootstraps 1000
set seed 23

// macros
global klmshare				: env klmshare
global projects				: env projects
global klmperry             : env klmperry
global storageb				: env storageb

global perrydatas       = "${klmperry}/CBA/data/perry/raw"
global perrydata        = "${klmperry}/CBA/data/perry/clean"
global masterinter  	= "${klmshare}/CurrentRAs/FredB/test"
global output           = "${projects}/ece_parenting/tex_files/other"
global dataperry        = "$klmperry/PerryPreschool/Data/Perry_PARI_and_Other_Data/PARI/DATA_PARI/"
global abcjpeanalysis   = "${klmshare}/Data_Central/Abecedarian/data/ABC-CARE/extensions/cba-iv/"
global dataihdp         = "${projects}/ece_parenting/data"
global dataihdp         = "$storageb/dc_data/"

cd  $dataihdp
use ihdp_data, clear
rename _all, lower

replace   bw = bw/1000

// impute iq
replace   stndscor =  iqcage     if stndscor ==.
replace   iqcage   =  stndscor   if iqcage ==.
foreach var of varlist stndscor iqcage { 
	replace `var' = mdi_24cor    if stndscor ==. & iqcage ==.
}
replace   stndscor =  iqcage     if stndscor ==.
replace   iqcage   =  stndscor   if iqcage ==.

// low birthweight
gen lb = 1
rename tot_siblings_natural siblings_natural

// list relevant variables
global baseline_child      sex bw anga black hispanic 
global baseline_mother     mage meduc works married 
global baseline_household  welfare siblings_natural employed_adult
global baseline_economy    employment medinc gpc
global outputs             iqcage stndscor 
global inputs              cum_avg_daycare_36m_sum

// mark baseline sample
reg  $baseline_child $baseline_mother $baseline_household $baseline_economy $outputs $inputs
gen  sample1 = e(sample) 
keep if sample1 == 1

// footnote regression
gen     bwg_nh = 1 if bwg == "H"
replace bwg_nh = 0 if bwg == "L"
reg     bwg_nh twin

// parenting latents, ages 1 and 3
foreach var of varlist alt_subscale_1_12-alt_subscale_6_12 alt_subscale_1_36-alt_subscale_8_36 {
	summ `var'
	gen  `var'_std = (`var' - r(mean))/r(sd)
}
# delimit
sem (_cons@0 X -> alt_subscale_1_12_std) (_cons@0 X -> alt_subscale_2_12_std) (_cons@0 X -> alt_subscale_3_12_std) 
	(_cons@0 X -> alt_subscale_4_12_std) (_cons@0 X -> alt_subscale_5_12_std) (_cons@0 X -> alt_subscale_6_12_std), method(adf);
predict parenting_age1, latent;
sem (_cons@0 X -> alt_subscale_1_36_std) (_cons@0 X -> alt_subscale_2_36_std) (_cons@0 X -> alt_subscale_3_36_std) (_cons@0 X -> alt_subscale_4_36_std)
	(_cons@0 X -> alt_subscale_5_36_std) (_cons@0 X -> alt_subscale_6_36_std) (_cons@0 X -> alt_subscale_7_36_std) (_cons@0 X -> alt_subscale_8_36_std), 
																																						 cov(e.alt_subscale_1_36_std*e.alt_subscale_2_36_std)
																																						 cov(e.alt_subscale_3_36_std*e.alt_subscale_4_36_std)
																																						 cov(e.alt_subscale_5_36_std*e.alt_subscale_6_36_std)
																																						 cov(e.alt_subscale_7_36_std*e.alt_subscale_8_36_std)
																																						 cov(e.alt_subscale_4_36_std*e.alt_subscale_7_36_std)
																																						 cov(e.alt_subscale_2_36_std*e.alt_subscale_7_36_std) method(adf);		
 predict parenting_age3, latent;
# delimit cr
egen    parenting_ages13 = rowmean(parenting_age1 parenting_age3)

// standardize latents in sample
foreach var of varlist parenting_ages13 {
	summ    `var' if tg == 0
	gen     `var'_std = (`var' - r(mean))/r(sd)
	replace `var'_std = `var'_std + 100
}
// log and standardize childcare variable 
summ    cum_avg_daycare_36m_sum  if tg == 0 
replace cum_avg_daycare_36m_sum  = (cum_avg_daycare_36m_sum    - r(mean))/r(sd)
replace cum_avg_daycare_36m_sum  =  cum_avg_daycare_36m_sum    + r(mean)

rename cum_avg_daycare_36m_sum c
rename parenting_ages13_std    p

global inputs_std c p
keep   ihdp site tg bwg twin $baseline_child $baseline_mother $baseline_household $baseline_economy

global sample_all          if tg != .
global sample_control      if tg == 0
global sample_treatment    if tg == 1

// correlations
foreach group in all control treatment {
	matrix all_`group' = J(1,1,.)
	foreach var of varlist tg $baseline_child $baseline_mother $baseline_household $baseline_economy {
		correlate `var' twin ${sample_`group'}
		matrix `var'_`group' = [r(rho)]
		matrix all_`group' = [`var'_`group' \ all_`group']
	}
	matrix all_`group' = all_`group'[1..16,1...]
	matrix colnames all_`group' = rho_`group'
}

// joint test
reg twin tg $baseline_child $baseline_mother $baseline_household $baseline_economy, robust
test     tg $baseline_child $baseline_mother $baseline_household $baseline_economy
local F = r(F)
local p = r(p)
local N = e(N)
foreach stat in F p {
	local `stat' = string(``stat'',"%9.2fc")
}
foreach stat in N {
	local `stat' = string(``stat'',"%9.0fc")
}


// plot
# delimit
global box  text(14.75 -.0485
	 "{bf: Joint Test}"
	 "{bf: {it:F} = `F' ({it:p}-value = `p')}"
	 "{bf: {it:N} = `N'}" , place(c) box size(small) just(c) margin(l+1 b+1 t+1 r+1) fcolor(none)); 
# delimit cr

clear
foreach group in all control treatment {
	svmat all_`group', names(col)
}

gen n = _n

global baseline_child      sex bw anga black hispanic 
global baseline_mother     mage meduc works married 
global baseline_household  welfare siblings_natural employed_adult
global baseline_economy    employment medinc gpc
global outputs             iqcage stndscor 
global inputs              cum_avg_daycare_36m_sum

#delimit
twoway  (scatter n rho_all      , msize(med)  mlcolor(gs0) mfcolor(gs0) msymbol(circle) xline(0, lpattern(solid)  lcolor(gs12) lwidth(vvthick))
																						xline(-.07, lpattern(solid)  lcolor(gs12)))



	
,	  
		  

		legend(off)
			   
		  xlabel(-.07[.035].075, labsize(medsmall) grid   glcolor(gs12) glpattern(solid)) 
		  ylabel(16 "Treatment"                     15 "Male"                  14 "Birthweight"     13 "Gestational Age" 12 "Black" 11 "Hispanic"
		         10 "Maternal Age"                   9 "Maternal Education"     8 "Mother Works"     7 "Married Mother"
				  6 "Household Uses Social Programs" 5 "Siblings"               4 "Employed Adults" 
				  3 "% Employed"                     2 "Median Income"          1 "Government Expenditure per Capita",   labsize(small) angle(h) glcolor(gs12) glpattern(solid)) 
		  
		  ytitle("", size(medium))
		  xtitle("Correlation")
		  graphregion(color(white)) plotregion(color(white)) $box;
#delimit cr
cd $output
graph export twinningcorrelations.eps, replace
